home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip: Internet
/
Chip Internet.iso
/
wwwutil
/
hotjava.ins
/
hotjava.exe
/
hotjava
/
classsrc
/
net
/
www
/
html
/
Document.java
< prev
next >
Wrap
Text File
|
1995-08-11
|
5KB
|
184 lines
/*
* @(#)Document.java 1.15 95/05/10 Jonathan Payne
*
* Copyright (c) 1994 Sun Microsystems, Inc. All Rights Reserved.
*
* Permission to use, copy, modify, and distribute this software
* and its documentation for NON-COMMERCIAL purposes and without
* fee is hereby granted provided that this copyright notice
* appears in all copies. Please refer to the file "copyright.html"
* for further important copyright and licensing information.
*
* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
* THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
* TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
* ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
* DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
*/
package net.www.html;
import java.util.*;
import java.io.*;
import awt.DisplayItem;
/** net.ww.html.Document is a class which holds an html document.
An html document is parsed text, a vector of tags, and
optional html source text. Document is used by html.Parser to
store parsed documents, but it is also used to help
programatically generate html documents. E.g., the ftp
protocol handler might generate an html document out of a
directory listing. */
public class Document {
/** The head tag, so we can tell when we've finished parsing
the head of a document. The reason we care is that all the
elements inside a head should really not be rendered. This
is where we'll stick the <title> and perhaps other
extensions such as a digital signature. The problem is,
mosaic and other viewers out there don't mind if there is
a <head> without the corresponding </head>. So millions
of people all over the world write completely incorrect
html documents AND NEVER EVEN KNOW IT! */
static Tag HEADtag = Tag.lookup("head");
/** If this is non-null, it contains the htmlSource that this
document was built from. */
protected String htmlSource;
/** This is the text of the html document, with the tags and
extra white space removed. */
protected byte text[];
/** This is an array of tag references into the text string. */
protected Vector tags;
/** Title of this html document, or null if not known yet. */
protected String title;
/** This is whether or not we're in a <pre> section. <pre>
sections cannot be nested, and we enforce that here, so we
don't need to keep a count. */
protected boolean inPREelement = false;
public Document() {
reset();
}
public final void setSource(String source) {
htmlSource = source;
}
public void reset() {
text = null;
title = null;
if (tags == null) {
tags = new Vector();
} else {
tags.setSize(0);
}
}
/** This adds a pre-made TagRef to the document, and sets
its position to the current position. */
protected final TagRef addTagRef(TagRef ref, int offset) {
if (ref != null) {
Tag theTag = ref.tag;
int tagID = theTag.id;
if (inPREelement) {
if (tagID == Tag.PRE && ref.isEnd) {
inPREelement = false;
} else {
if (theTag.breaks && tagID != Tag.BR && tagID != Tag.HR) {
System.out.println("Warning: Ignoring: " + theTag + " inside <pre> section");
return null;
}
}
} else if (tagID == Tag.PRE) {
inPREelement = true;
}
ref.pos = offset;
tags.addElement(ref);
}
return ref;
}
/** Create a new start tag ref at the current position. This
creates a generic TagRef object, but can be subclassed to
create specific TagRef subclasses. */
public TagRef startTag(Tag t, int offset) {
//System.out.println("Add <" + t.name + "> at " + offset);
return addTagRef(new TagRef(t, offset, false), offset);
}
/** Create a new end tag ref at the current position. This
creates a generic TagRef object, but can be subclassed to
create specific TagRef subclasses. */
public TagRef endTag(Tag t, int offset) {
//System.out.println("Add </" + t.name + "> at " + offset);
return addTagRef(new TagRef(t, offset, true), offset);
}
/** Return the vector of tag refs. */
public Vector getTags() {
return tags;
}
/** Return the text array. */
public byte getText()[] {
return text;
}
/** Set the text array. It is not copied. */
public void setText(byte text[]) {
this.text = text;
}
/** Set the text. */
public void setText(String str) {
text = new byte[str.length()];
for (int i = 0 ; i < text.length ; i++) {
text[i] = (byte)str.charAt(i);
}
}
/** This returns the title for this document, as defined by
the <title> </title> tag pairs. If no title is found,
"" is returned. */
public String getTitle() {
if (title != null) {
return title;
}
if (tags == null) {
return "";
}
Tag titleTag = Tag.lookup("title");
int cnt = tags.size();
int i = 0;
int pos0 = -1;
title = "";
while (--cnt >= 0) {
TagRef ref = (TagRef) tags.elementAt(i++);
if (ref.tag == titleTag) {
if (ref.isEnd) {
/* check to make sure we found the <title> begin
tag! If not, we return no title. */
if (pos0 == -1) {
break;
}
title = new String(text, 0, pos0, ref.pos - pos0);
break;
}
pos0 = ref.pos;
}
}
return title;
}
}